﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_cityind_sum_t1.sas 5 &

Calculate the employment of top 1 city-ind firm that is also a top 10% ind firm.
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;

/*
================================================================================
Load Raw Data
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;
%m_read(param_dev=1,param_lyear=0, param_czone=1, param_msa1983=1, param_msacz=1, param_drop=1);

/*
================================================================================
Macro that aggregates data
*/

%macro cind_sum(dt_in= , dt_out= , c_city= , l_ind_perc= , l_cind_rank= );

%put Top Industry Firms;
%put Input &dt_in.;
%put Output &dt_out.;
%put City &c_city.;
%put Percentile &l_ind_perc.;

data dt_proc;
  set &dt_in.(keep=year &c_city. ch_ind sector firmnum lbdid worker est salary indf_rank indf_perc);
run;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry-city-firm level
*/
proc sort data=dt_proc;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
run;

proc means data=dt_proc noprint;
  by year sector ch_ind &c_city. firmnum indf_rank indf_perc;
  output out=dt_indf(drop=_type_ _freq_) sum(worker est salary)=emp_cind est_cind pay_cind;
run;

/*
--------------------------------------------------------------------------------
Generate firm rank in city-industry
*/
proc sort data=dt_indf; by year &c_city. ch_ind indf_rank;

data dt_indf;
  set dt_indf;
  by year &c_city. ch_ind;
  if first.ch_ind then cindf_rank=0;
  cindf_rank+1;
run;

/*
--------------------------------------------------------------------------------
Keep firms that is a top 1 city-ind firm and also a top 10% ind firm
*/

data &dt_out.;
  set dt_indf;
  if cindf_rank = 1 & indf_perc <= 0.1;
run;

data &dt_out.;
  set &dt_out. (keep=year sector ch_ind &c_city. emp_cind est_cind pay_cind);
  rename emp_cind=emp_cind_t1_10 est_cind=est_cind_t1_10 pay_cind=pay_cind_t1_10;
run;

%mend;

/*
================================================================================
Macro that aggregates data for a given geo definition
*/

%macro m_main(vgeo= );

%put --------------------------------------------------------------------------------;
%put Geo Variable: &vgeo.;

%cind_sum(dt_in=lbd, dt_out=lbd_cind_&vgeo., c_city=&vgeo., l_ind_perc=10 5 1, l_cind_rank=1);

data lbd_cind_&vgeo.;
  set lbd_cind_&vgeo.;
  drop _type_ _freq_;
run;

proc export data=lbd_cind_&vgeo. outfile="&dir_out./cityind_sum_&vgeo._all_t1.dta" replace;
run;

%mend;

/*
================================================================================
Run the macro
*/

/*
--------------------------------------------------------------------------------
MSA and FIPS
*/

data lbd;
  set lbd;
  runif=runif_1;
run;

%m_main(vgeo=msa);
%m_main(vgeo=fips);

%m_main(vgeo=msa1983);


/*
--------------------------------------------------------------------------------
Communting zones
*/

%m_main(vgeo=czone);
%m_main(vgeo=msa1983cz);

/*
--------------------------------------------------------------------------------
Zip codes
*/

/* 5-dig zip */
data lbd;
   set lbd;
   if zipcode = "" then delete;
   zipcode = substr(zipcode,1,5);
run;

%m_main(vgeo=zipcode);


/* End of sas file */
